// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/strings/pattern.h"

#include "base/third_party/icu/icu_utf.h"

namespace base {

namespace {

    static bool IsWildcard(base_icu::UChar32 character)
    {
        return character == '*' || character == '?';
    }

    // Move the strings pointers to the point where they start to differ.
    template <typename CHAR, typename NEXT>
    static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
        const CHAR** string, const CHAR* string_end,
        NEXT next)
    {
        const CHAR* escape = NULL;
        while (*pattern != pattern_end && *string != string_end) {
            if (!escape && IsWildcard(**pattern)) {
                // We don't want to match wildcard here, except if it's escaped.
                return;
            }

            // Check if the escapement char is found. If so, skip it and move to the
            // next character.
            if (!escape && **pattern == '\\') {
                escape = *pattern;
                next(pattern, pattern_end);
                continue;
            }

            // Check if the chars match, if so, increment the ptrs.
            const CHAR* pattern_next = *pattern;
            const CHAR* string_next = *string;
            base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
            if (pattern_char == next(&string_next, string_end) && pattern_char != CBU_SENTINEL) {
                *pattern = pattern_next;
                *string = string_next;
            } else {
                // Uh oh, it did not match, we are done. If the last char was an
                // escapement, that means that it was an error to advance the ptr here,
                // let's put it back where it was. This also mean that the MatchPattern
                // function will return false because if we can't match an escape char
                // here, then no one will.
                if (escape) {
                    *pattern = escape;
                }
                return;
            }

            escape = NULL;
        }
    }

    template <typename CHAR, typename NEXT>
    static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next)
    {
        while (*pattern != end) {
            if (!IsWildcard(**pattern))
                return;
            next(pattern, end);
        }
    }

    template <typename CHAR, typename NEXT>
    static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
        const CHAR* pattern, const CHAR* pattern_end,
        int depth,
        NEXT next)
    {
        const int kMaxDepth = 16;
        if (depth > kMaxDepth)
            return false;

        // Eat all the matching chars.
        EatSameChars(&pattern, pattern_end, &eval, eval_end, next);

        // If the string is empty, then the pattern must be empty too, or contains
        // only wildcards.
        if (eval == eval_end) {
            EatWildcard(&pattern, pattern_end, next);
            return pattern == pattern_end;
        }

        // Pattern is empty but not string, this is not a match.
        if (pattern == pattern_end)
            return false;

        // If this is a question mark, then we need to compare the rest with
        // the current string or the string with one character eaten.
        const CHAR* next_pattern = pattern;
        next(&next_pattern, pattern_end);
        if (pattern[0] == '?') {
            if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
                    depth + 1, next))
                return true;
            const CHAR* next_eval = eval;
            next(&next_eval, eval_end);
            if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
                    depth + 1, next))
                return true;
        }

        // This is a *, try to match all the possible substrings with the remainder
        // of the pattern.
        if (pattern[0] == '*') {
            // Collapse duplicate wild cards (********** into *) so that the
            // method does not recurse unnecessarily. http://crbug.com/52839
            EatWildcard(&next_pattern, pattern_end, next);

            while (eval != eval_end) {
                if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
                        depth + 1, next))
                    return true;
                eval++;
            }

            // We reached the end of the string, let see if the pattern contains only
            // wildcards.
            if (eval == eval_end) {
                EatWildcard(&pattern, pattern_end, next);
                if (pattern != pattern_end)
                    return false;
                return true;
            }
        }

        return false;
    }

    struct NextCharUTF8 {
        base_icu::UChar32 operator()(const char** p, const char* end)
        {
            base_icu::UChar32 c;
            int offset = 0;
            CBU8_NEXT(*p, offset, end - *p, c);
            *p += offset;
            return c;
        }
    };

    struct NextCharUTF16 {
        base_icu::UChar32 operator()(const char16** p, const char16* end)
        {
            base_icu::UChar32 c;
            int offset = 0;
            CBU16_NEXT(*p, offset, end - *p, c);
            *p += offset;
            return c;
        }
    };

} // namespace

bool MatchPattern(const StringPiece& eval, const StringPiece& pattern)
{
    return MatchPatternT(eval.data(), eval.data() + eval.size(),
        pattern.data(), pattern.data() + pattern.size(),
        0, NextCharUTF8());
}

bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern)
{
    return MatchPatternT(eval.data(), eval.data() + eval.size(),
        pattern.data(), pattern.data() + pattern.size(),
        0, NextCharUTF16());
}

} // namespace base
